perm filename TEXSYN.SAI[1,DEK]1 blob
sn#310517 filedate 1977-10-14 generic text, type C, neo UTF8
COMMENT ⊗ VALID 00016 PAGES
C REC PAGE DESCRIPTION
C00001 00001
C00003 00002 The syntax module of TEX.
C00007 00003 A list of the command codes
C00015 00004 The hash table: hash,eqtb,idlen,idlev,idtyp
C00019 00005 Saving and restoring eqtb values: eqdefine,newsavelevel,destroy,restore
C00028 00006 Hash table algorithms: idlookup,controlseq,idname,hashentry,eqlink
C00036 00007 The input stacks: inbuf,curbuf,state,loc,recovery,filename,parstack
C00042 00008 Tokens, token lists, and the diagnostic routine dumplist
C00048 00009 Maintaining the input stacks: pushinput,popinput,initin,dumpcontext,inslist
C00054 00010 The basic input procedure getnext and its cousins gettok,getncnext,getnctok
C00062 00011 Defining user control sequences and output routines: macrodef,scantoks
C00069 00012 Calling user macros: macrocall
C00076 00013 Accessing user's files: requirefile, definefont
C00081 00014 Basic scanning routines: backinput,scandigit,scanlb,scanstring,scannumber
C00086 00015 Further scanning routines: scanlength,scanglue,scanspec
C00090 00016 Still more scanning routines: passblock,insnum,scancond
C00093 ENDMK
C⊗;
comment The syntax module of TEX.
(It is wise to read the memory allocation sections of TEXSYS
before delving very deeply into the following code.)
The purpose of these routines is to deliver the user's input to
the semantics module of TEX, one token at a time. This module
also contains utility subroutines for syntactic operations such as
the scanning of glue specifications. The save-and-restore mechanism,
which maintains the current meanings of control sequences, appears here too.
Each call of the procedure "getnext" sets the value of two variables
"curcmd" and "curcar", representing the next input token.
curcmd denotes a command code,
curchar denotes a character code or other modifier of the command code.
The semantics module acts as an interpretive routine responding to these commands.
Underneath this external behavior of "getnext" is all the machinery necessary
to convert from character files to tokens. At a given time we may be partially
finished reading some files (when a \require was sensed), partially finished
expanding some user-defined macro and perhaps one of its parameters, partially
finished generating some of the standard code in an \halign, and so on.
When reading a character file, comments and redundant blank spaces must be
removed, paragraphs must be recognized. Furthermore there are occasions
in which the scanner has looked ahead for a word like "plus" but has found
only part of that word, hence a few characters must be fed back and scanned
again. To handle all these situations, there are various stacks which
hold information about the incomplete activities, and a finite state control
for each level of the input control. These stacks record the current state
of an implicitly recursive process, but the procedures themselves are
nonrecursive. This has been done so that low-level implementations of the
same algorithms are easy to create and because getnext acts as a coroutine of
the semantic actions;
require "TEXHDR.SAI" source_file;
internal integer curcmd # the current command code appearing in the input;
internal integer curchar # the current character code appearing in the input;
comment A list of the command codes;
comment The following definitions attach numeric codes to the various
"commands" interpreted by TEX. The symbolic names of these codes are
used elsewhere. Sometimes the ordering of the codes is important
(e.g. we might branch on cmd ≥ vjust), so the codes are not completely
arbitrary;
internaldef escape=0 # escape delimiter (\ in TEX manual);
internaldef lbrace=1 # begin block symbol ( { );
internaldef rbrace=2 # end block symbol ( } );
internaldef mathbr=3 # math break ( $ );
internaldef tabmrk=4 # tab mark ( ⊗ );
internaldef carret=5 # carriage return and comment mark ( % );
comment carret is also used as the command code for \cr;
internaldef macprm=6 # macro parameter ( # );
internaldef supmrk=7 # superscript ( ↑ );
internaldef submrk=8 # subscript ( ↓ );
internaldef ignore=9 # chars to ignore;
internaldef spacer=10 # chars treated as blank space;
internaldef letter=11 # chars treated as letters;
internaldef otherchar=12 # none of the above character types;
internaldef parend=13 # end of paragraph;
internaldef match=14 # macro parameter matching;
internaldef outpar=ignore # output a macro parameter;
internaldef call=15 # call a user-defined macro;
internaldef ext=16 # extensions to basic TEX (\x);
internaldef assignreal=17 # user-defined length;
internaldef assignglue=18 # user-defined glue;
internaldef font=19 # user-defined current font;
comment Codes "assignreal" thru "font", inclusive, are not redefinable by \def,
since their eqtb entries are used by TEX semantics. Codes "call" thru
"assignglue", inclusive, have eqtb entries whose link fields may point into mem,
so they should not be changed without deallocation;
internaldef def=20 # macro definition (\def,\gdef);
internaldef output=21 # output routine definition (\output);
internaldef require=22 # required input file (\require);
internaldef trace=23 # begin macro tracing (\trace);
internaldef stop=24 # end of input (\end);
internaldef ddt=25 # emergency debugging (\ddt);
internaldef ascii=26 # untypeable character (\ascii);
internaldef chcode=27 # change chartype table (\chcode);
internaldef fntfam=28 # declare font family (\fntfam);
internaldef setcpage=29 # set current page number (\setcpage);
internaldef advcpage=30 # increase current page number (\advcpage);
internaldef cpage=31 # insert current page number (\cpage);
internaldef ifeven=32 # conditional on cpage even (\ifeven);
internaldef ifT=33 # conditional on character T (\ifT);
internaldef elsecode=34 # delimiter for conditionals (\else);
internaldef box=35 # saved box (\box,\page) or justification(\hjust,\vjust);
FIXTHIS: close up the codes;
internaldef halign=39 # horizontal table alignment (\halign);
internaldef valign=40 # vertical table alignment (\valign);
internaldef endv=41 # end of vlist in halign or valign template;
internaldef nalign=42 # insertion into halign or valign (\nalign);
internaldef vskip=43 # vertical glue (\vskip,\vfill);
internaldef hskip=44 # horizontal glue (\hskip,\hfill);
internaldef hrule=45 # horizontal rule (\hrule);
internaldef vrule=46 # vertical rule (\vrule);
internaldef topbotins=47 # inserted vlist (\topinsert or \botinsert);
internaldef save=48 # save a box (\save);
internaldef topbotmark=49 # insert mark (\topmark,\botmark);
internaldef mark=50 # define a mark (\mark);
internaldef penalty=51 # specify badness of break (\penalty);
internaldef noindent=52 # begin nonindented paragraph (\noindent);
internaldef eject=53 # eject page here (\eject);
internaldef hmove=54 # horizontal motion of box (\moveleft,\moveright);
internaldef vmove=55 # vertical motion of box (\raise,\lower);
internaldef discr=56 # discretionary hyphen (\-,\*);
internaldef accent=57 # attach accent to character (\+);
internaldef eqno=58 # insert equation number (\eqno);
internaldef mathonly=59 # character or token allowed in mathmode only;
internaldef exspace=60 # explicit space (\ );
internaldef maxopcode=60 # the largest code number;
comment The hash table: hash,eqtb,idlen,idlev,idtyp;
comment Control sequences, some of which are predeclared, are recorded in a
hash table, with an associated table of their equivalent meanings. Linear
probing (e.g., Algorithm 6.4L in ACP) is used to access this table, which is
in three parts: The last 128 words are for the command codes associated with
characters read from external files (these are changed by \chcode).
The next-to-last 128 words are for single-character control sequences
which are addressed directly. The first words are for packed representations
of longer control seqences, using six bits for the first letter (in order to
distinguish upper and lower case) and five bits for each remaining letter,
left justified in the word.
Entries in the equivalents table contain several fields:
idlen (length-1) mod 8 of the name
idlev level of {...} nesting at which this equivalent was defined
idcmd command code for the name
link pointer into mem or modifier of idcmd
The value of idlev is nonzero whenever the equivalent is defined: level 1
stands for initial default values and user definitions not in braces.
(Exception: idlev=0 in the last 128 words, as these words are treated specially.)
The value of idcmd is used to determine, among other things, what to do when
the equivalent value changes -- for example, if link points to a node
representing glue, we probably want to call procedure deletegluelink when
this field changes;
internaldef hashsize = 353 # hashtable size, should be prime and < 2↑chars-127;
preload_with 0 # the following array will be initialized upon loading;
internal integer array hash[0:hashsize-1] # hash table for packed names;
preload_with 0 # the following array will be initialized upon loading;
internal integer array eqtb[0:hashsize+255] # equivalents of hash table entries;
internaldef chartype(c) = ⊂eqtb[c+(hashsize+128)] # cmds associated with chars;
internaldef idlens=3,idlend=links # idlen field in eqtb;
internaldef idlevs=5,idlevd=idlens+idlend # idlev field in eqtb;
internaldef idcmdd=idlevs+idlevd,idcmds=bitsperwd-idcmdd # idcmd field in eqtb;
comment Saving and restoring eqtb values: eqdefine,newsavelevel,destroy,restore;
comment The nested structure provided by { and } blocks in TEX means that
eqtb entries of outer blocks should be saved and restored. Furthermore,
it is often necessary to free up some memory when an eqtb entry is changed.
The procedure eqdefine is used to set a new eqtb entry. If a previous value
was defined at the same nesting level, it is destroyed (using procedure
"destroy" which frees memory if appropriate), and the new value is inserted.
If a previous value was defined at an outer nesting level (indicated by its
idlev field), the old value is placed on savestack and the new value is
inserted. At the end of a nesting level, i.e., when the } is sensed, the
savestack is used to restore the outer values and the inner ones are destroyed.
Entries on savestack are of three main forms:
"-c" where c is an ending-routine code
denotes the first entry on a given nesting level, placed on savestack when
{ is sensed. These codes are defined in TEXSEM (cf. the processing of
rbrace in main_control), they indicate what action to perform when the }
comes along. Furthermore, some routines such as hjust and halign place
another word or two onto savestack, immediately below the "-c", denoting
parameters that tell the desired final size and disposition of the box. These
parameters are removed at the time the -c is removed, so the save and restore
routines of concern to us here do not have to know about such extra words.
value,index two words, the top word being ≥0
means that when } is sensed eqtb[index] should be reset to value.
(1,index) one word, the index in the link field
means that when } is sensed eqtb[index] and hash[index] should be reset to zero.
Procedure newsavelevel is called when a { is sensed, and restore is called
when a } is sensed;
internal integer curlev # the current level of nesting, times 2↑idlevd;
internaldef savesize = 100 # size of savestack;
internal integer saveptr # first unused entry on savestack;
internal integer array savestack[0:savesize+1] # place to save dormant eqtb entries;
comment By saying "+1" instead of "-1" on the previous line, we make it possible
to avoid testing for saveptr overflow, up to twice in a row (excuse the trick);
internaldef level1 = 1 lsh idlevd;
internal procedure initsave # initialize the save-restore mechanism;
begin curlev ← level1;
saveptr←0;
end;
procedure destroy(integer eqtbval);
begin comment Frees memory, if necessary, when the given value from eqtb is
to be forgotten;
integer p,t;
if(t←field(idcmd,eqtbval))≤assignglue and t≥call then
begin p←field(link,eqtbval);
case t of begin
[call] deleterclink(p) # p points to reference count
of token list for user-defined macro;
[ext] eqdestroyext(p) # possible extension to TEX;
[assignreal] freeavail(p) # p points to real value;
[assignglue] deletegluelink(p) # p points to glue node;
else confusion
end;
end;
end;
internal procedure eqdefine(integer index,cmd,link) # change eqtb entry;
begin comment This procedure defines an eqtb entry having specified idcmd
and link fields, and saves the former value if appropriate;
integer t,l;
l←ufield(idlev,t←eqtb[index]);
if l=curlev then
destroy(t) # redefinition on same level;
else if l>0 and curlev>level1 then
begin comment save definition on old level;
if saveptr≥savesize-1 then overflow(savesize);
savestack[saveptr]←t; savestack[saveptr+1]←index;
saveptr←saveptr+2 # store two words on savestack;
end;
comment if l=0 this is either a \gdef or the first definition of the control
sequence. In the latter case, a one-word entry was placed on savestack
when the control sequence was entered into the hash table;
eqtb[index]←ufield(idlen,t) + (cmd lsh idcmdd) + curlev + link;
end;
internal procedure chcodedef(integer index,value) # eqdefine for char codes;
begin if curlev > level1 then
begin if saveptr≥savesize-1 then overflow(savesize);
savestack[saveptr]←eqtb[index+(hashsize+128)];
savestack[saveptr+1]←index+(hashsize+128);
saveptr←saveptr+2 # store two words on savestack;
end;
eqtb[index+(hashsize+128)]←value;
end;
internal integer procedure restore # clears off top nesting level of savestack
and returns the ending-routine code;
begin curlev ← curlev - level1;
if curlev then
while true do
begin saveptr←saveptr-1; t←savestack[saveptr] # get top entry;
if t<0 then return(-t);
if t≥refct1 then
begin comment delete control sequence from hash table;
eqtb[t ← field(link,t)] ← 0;
if t < hashsize then hash[t]←0;
end
else begin comment restore old eqtb entry;
destroy(eqtb[t]) # after properly disposing of the present one;
saveptr←saveptr-1;
eqtb[t]←savestack[saveptr];
if t=fontloc then
begin comment This special case is detected for efficiency
so that curfont and ucurfont are always current;
curfont←eqtb[t] land '77; ucurfont←curfont lsh 7;
end;
end;
end
else begin comment curlev mustn't become zero, preserve definitions at level 1;
curlev←level1; return(bottomlevel);
end;
end;
internal procedure newsavelevel(integer endcode) # starts new nesting level;
begin comment The specified ending-routine code is stored on savestack,
initiating a new level of nesting;
if saveptr ≥ savesize then overflow(savesize);
savestack[saveptr] ← -endcode;
saveptr ← saveptr+1;
if(curlev←curlev+level1) ≥ 1 lsh(idlevd+idlevs) then overflow(idlevs);
end;
comment Here is a list of the ending-routine codes used;
internaldef bottomlevel=1,simpleblock=2,trueend=3,aligncode=4,mathcode=5,
outputend=6,nalignend=7,botinsend=8,topinsend=botinsend+1,justend=10;
comment Hash table algorithms: idlookup,controlseq,idname,hashentry,eqlink;
internal integer hashentry # the most recent hash table location;
internal procedure idlookup(integer id,len) # searches the hashtable;
begin comment The packed name "id" whose idlen ufield is "len",
or (alternatively) the single-character code "id", is looked up in
the hash table. If not found, it is entered, and the savestack is adjusted
so that the entry will be cleared at the close of the current nesting
level. Upon exit, the appropriate index for this symbol in eqtb will appear
in the global variable "hashentry";
boolean thruonce;
if id<'200 then
begin comment single character id;
hashentry ← id+hashsize;
if eqtb[hashentry]≠0 then return;
end
else begin thruonce←false;
hashentry ← (abs(id)+len) mod hashsize;
while((t←hash[hashentry])≠0 and t≠id)or ufield(idlen,eqtb[hashentry])≠len do
begin hashentry←hashentry-1 # move to next position;
if hashentry<0 then
begin if thruonce then overflow(hashsize);
thruonce←true; hashentry←hashentry+hashsize # cyclically;
end;
end;
if t≠0 then return;
hash[hashentry]←id;
end;
comment new control sequence encountered;
eqtb[hashentry]←len+curlev;
if curlev>level1 then
begin if saveptr≥savesize then overflow(savesize);
savestack[saveptr]←refct1+hashentry # special savestack entry;
saveptr←saveptr+1;
end;
end;
procedure controlseq # gets a packed name from the input;
begin comment This procedure removes a control sequence from the string variable
curbuf, assuming that the initial escape character \ has already been removed.
Then this control sequence is found in the hashtable, and hashentry is set;
integer id,len,d;
id←lop(curbuf) # remove first character;
if id='15 then
begin comment carriage return should become a space;
id←'40; curbuf←'15;
end;
comment note that in \% the % should not be treated as a comment delimiter;
len←0 # len represents (length-1) lsh idlend;
if chartype(id)=letter and chartype(curbuf)=letter then
begin comment two or more letters in the control sequence;
d←bitsperwd-6; id←id lsh d # pack first character;
do begin id←id+((lop(curbuf)land'37)lsh(d←d-5)) #
if d≤-5 this shifts the character out of sight;
len←len+(1 lsh idlend);
end until chartype(curbuf)≠letter;
end;
idlookup(id,len land('7 lsh idlend));
end;
internal string procedure idname(integer h) # the name associated with eqtb[h];
comment This is sort of an inverse to the controlseq procedure;
if h≥hashsize then return(h-hashsize) else
begin integer t; string s;
define lettersperwd = ((bitsperwd-6)div 5)+1 # number of complete letters;
define leastsiglet = 2↑(bitsperwd-5*lettersperwd+4)-1 # mask for rthand letter;
t←(hash[h] lsh(5*lettersperwd+1-bitsperwd))rot(bitsperwd-5*lettersperwd+5);
s←(t land'77)lor'100; t←t land (-'100) # remove first letter;
while t land ('37 rot -5) do
begin t←t rot 5; s←s&((t land'37)lor'140);
end;
if hash[h] land leastsiglet then
begin t←field(idlen,eqtb[h]);
while t≠(lettersperwd-1) do
begin t←(t-1)mod 8; s←s&"x" # add x's to reach required length;
end;
end;
return(s);
end;
comment The following global variables are set to positions in the eqtb,
for reference by the semantic routines;
internaldef locsize=10 # size of locs array for storing eqtb locations;
internal integer array locs[0:locsize-1];
internaldef hsizeloc=⊂locs[0]⊃, vsizeloc=⊂locs[1]⊃, parindentloc=⊂locs[2]⊃,
lineskiploc=⊂locs[3]⊃, baselineskiploc=⊂locs[4]⊃, parskiploc=⊂locs[5]⊃,
dispskiploc=⊂locs[6]⊃, topskiploc=⊂locs[7]⊃, botskiploc=⊂locs[8]⊃,
tabskiploc=⊂locs[9]⊃ # allocation of the "loc" variables;
internaldef fontloc = hashsize+":" # eqtb location for \:;
internaldef xloc(x) = ⊂x⊃&"loc" # eqtb location for x;
internaldef eqlink(x) = ⊂field(link,eqtb[xloc(x)])⊃ # stored link field for x;
internal integer escapechar # set to the first character of user input;
comment This convention ensures that escapechar is a character the user can type;
comment The input stacks: inbuf,curbuf,state,loc,recovery,filename,parstack;
Comment TEX uses two different conventions for representing stacks.
1) A sequential stack in which there is frequent access to the top
entry, and the stack is essentially never empty. Then the top entry is kept
in a global variable (even better would be a register), and the other entries
are in stack[0] thru stack[ptr-1]. Example: The main input stacks.
2) A sequential stack with infrequent top access. Then the stack
contents are in stack[0] thru stack[ptr-1]. Example: The save stack.
The state of the scanning routine appears in the following stacks, maintained
with convention #1:;
internaldef stacksize=10 # maximum number of simultaneous input sources;
internal string array inbufstack[0:stacksize]; internal string inbuf
# current lines being input from a character file;
internal string array curbufstack[0:stacksize]; internal string curbuf
# the parts of inbuf that haven't yet been input;
internal string array filenamestack[0:stacksize]; internal string filename
# the names of the current character files;
internal integer array statestack[0:stacksize]; internal integer state
# current scanner state codes;
internal integer array locstack[0:stacksize]; internal integer loc
# current scanner locations;
internal integer array recoverystack[0:stacksize]; internal integer recovery
# information about what to do when done on each level;
comment The upper limit in these declarations is stacksize rather than stacksize-1
so that the dumpcontext routine doesn't cause embarrassing stack overflow;
comment There are just four state codes:;
internaldef tokenlist=0 # scanning a token list;
internaldef midline=1 # scanning a line of characters;
internaldef skipblanks=2+otherchar # like midline but ignoring blanks;
internaldef newline=3+2*otherchar # beginning a new line of characters;
comment When the state specifies reading from an external character file (i.e.,
when state ≠ tokenlist), inbuf contains the current line, and curbuf contains
the remains of the current line as its characters are being lopped off.
String filename is the name of the file -- this is used only for printing error
messages and returning to the editor (cf. the error procedure in TEXSYS).
The loc contains page number and line number of the current line, in its
respective info and link fields. The channel number appears in recovery.
A null filename denotes input from the user terminal, and recovery not used
in this case since such input never reaches the end-of-file.
When the state specifies reading from an internal linked list of tokens
(i.e., state=tokenlist), inbuf and curbuf and filename are not used.
The loc points to the next token to be scanned, and recovery contains information
about what to do when reaching the end of the list. More precisely,
recovery contains
-l, if nothing is to be done when the list starting at l is exhausted
+l, if the token list starting at l is to be destroyed upon completion
l lsh infod + p, if the token list starting at l denotes a macro body
and the parstack is to be pruned until parptr=p.
Macro parameters are kept on parstack, which grows at a different rate than
the others. This stack is maintained with convention #2;
internaldef parsize=13 # max number of simultaneous parameters;
internal integer array parstack[0:parsize-1] # token-list pointers for parameters;
internal integer parptr # first unused location in parstack;
comment Tokens, token lists, and the diagnostic routine dumplist;
comment A token is either a character or end-paragraph code or control
sequence found in some character file. Sometimes TEX considers tokens
to be a pair (cmd,char) of command and character, but sometimes it
considers these as a unit in packed form;
internaldef chars=9,chard=0 # definition of char field in packed tokens;
internaldef cmds=4,cmdd=chars # definition of cmd field in packed tokens;
comment The cmd field of a token never exceeds 14 (at least the way the codes
are now), and never equals carret. We must have hashsize+127 < 2↑chars;
comment Control sequence tokens are represented by the packed pair (0,hashentry)
where hashentry is the index in eqtb for the control sequence. Since 0 is the
cmd code for an escape character, there is no ambiguity, as an escape by
itself does not constitute a token.
A token list is a singly-linked list of one-word nodes, containing packed
tokens in their info fields. Macro definitions and output-routine definitions
and marks are stored as token lists preceded by a reference-count node.
Two special commands appear in the token lists of macro definitions:
match [char=0 means match a parameter, char=1 means end of matching]
outpar [output parameter number char+1].
The enclosing { and } of the right-hand side of a macro definition are ommitted.
The final } of an output or mark definition is included in the tokenlist.
The following example macro definition illustrates these conventions:
\def\mac a#1#2 \b {#1\:a ##1#2 #2}
is represented by a token list containing
(ref ct), \mac, a, match0, match0, (space), \b, match1,
outpar0, \:, a, (space), #, 1, outpar1, (space), outpar1.
Note that the macro name appears just after the reference count, this is
for error messages. Procedure macrodef builds such token lists, and
macrocall uses them.
Examples such as
\def \m {\def \m {a} b}
explain why a reference counter is needed: The eqtb entry for \m is
changed before the token list for m has been consumed, hence we can't
simply destroy the token list when \m is redefined.
The procedure dumplist illustrates the above conventions. It is used
for diagnostic purposes;
internal string array tokstring[0:1] # output of displaylist;
internal procedure dumplist(integer p,q) # makes strings out of a token list;
begin comment This procedure is used for diagnostic messages. It creates two
strings from the token list pointed to by p, namely tokstring[0] for all
tokens up to but not including the one pointed to by q, and tokstring[1]
for the remaining tokens if any. For example, if p points to the node \mac
in the above example and if q points to the second "a", the result will be
tokstring[0]="\mac a#1#2 \b →#1\: "
tokstring[1]="a ##1#2 #2".
No reference counters should be in the list pointed to by p;
integer j # 0 until q is reached, then 1;
integer cmd,char,t,npars; string s;
tokstring[0]←tokstring[1]←null; j←0; npars←"0";
while p do
begin if p=q then j←1;
t←info(p); cmd←field(cmd,t); char←field(char,t);
case cmd of begin
[0] s←escapechar&idname(char)&" ";
[match] if char=0 then s←"#"&(npars←npars+1) else s←"→";
[outpar] s←"#"&cvs(char+1);
[macprm] s←"##";
[spacer] s←" ";
[parend] s←escapechar&"par "
[lbrace][rbrace][mathbr][tabmrk][carret][supmrk][submrk][letter][otherchar]
s←char;
else confusion
end;
tokstring[j]←tokstring[j]&s;
end;
end;
comment Maintaining the input stacks: pushinput,popinput,initin,dumpcontext,inslist;
internal procedure pushinput # save current input status on the stacks;
if inptr≥stacksize then overflow(stacksize) else
begin inbufstack[inptr]←inbuf;
curbufstack[inptr]←curbuf;
filenamestack[inptr]←filename;
statestack[inptr]←state;
locstack[inptr]←loc;
recoverystack[inptr]←recovery;
inptr←inptr+1;
end;
internaldef inslist(p)=⊂begin pushinput;state←tokenlist;loc←recovery←p end⊃;
comment The above inserts the tokenlist pointed to by p into the input stream
and sets things up so the token list is destroyed afterwards;
procedure insrclist(integer l) # like inslist for lists with reference counts;
begin pushinput; state←tokenlist;
if curchar then loc←topmark else loc←botmark;
recovery←(l lsh infod)+parptr; loc←link(l);
mem[l]←mem[l]+refct1;
end;
internal procedure popinput # finish current input level and restore the previous;
begin integer t;
inptr←inptr-1;
inbuf←inbufstack[inptr];
curbuf←curbufstack[inptr];
filename←filenamestack[inptr];
state←statestack[inptr];
loc←locstack[inptr];
recovery←recoverystack[inptr];
end;
define crffbreak=1,ffbreak=2 # break table codes, see below;
internal integer brchar # break character stored by system input;
internal integer eof # end-of-file code stored by system input;
internal procedure initin # get TEX input system ready to start;
begin setbreak(crffbreak,'15&'14,null,"INA") # crffbreak will now read the
input up to and including a carriage return or page mark,
ignoring oldstyle line numbers;
setbreak(ffbreak,'14,null,"INS") # ffbreak is used only to read past a
file directory page, it goes up to the first page mark;
inptr←0 # set input stacks empty;
state←newline;
inbuf←curbuf←filename←null;
recovery←0;
end;
internal string currentfile # current input file name, set by dumpcontext;
internal integer currentpage,currentline # set by dumpcontext;
internal procedure dumpcontext # prints where the scanner is;
begin comment This procedure shows the top levels of input, omitting
tokenlists that are about to be flushed (since they were most likely
inserted with inslist), until coming to a level that is a character file;
integer ptr,t; string lf; lf←'12 # line-feed symbol;
ptr←inptr;
inbufstack[ptr]←inbuf;
curbufstack[ptr]←curbuf;
filenamestack[ptr]←filename;
statestack[ptr]←state;
locstack[ptr]←loc;
recoverystack[ptr]←recovery;
while statestack[ptr]=tokenlist do
begin label advance;
if(t←recoverystack[ptr])<0 then
begin t←-t; print(newline,"<parameter> ");
end
else if(t←field(info,t))then
begin comment macrocall or output routine or mark;
t←link(t) # bypass reference count;
print(newline);
end
else go to advance # tokenlist to be flushed;
dumplist(t,locstack[ptr]);
print(tokstring[0][∞-31 to ∞],lf,tokstring[1][1 to 32]);
advance: ptr←ptr-1;
end;
currentfile←filenamestack[ptr];
currentpage←field(info,locstack[ptr]);
currentline←field(link,locstack[ptr]);
if ptr then print(newline,"p.",currentpage,",l.",currentline," ")
else print(newline,"(*) ");
if inbufstack[ptr] = '12 then t←2 else t←1 # ignore initial linefeed;
print(inbufstack[ptr][t to (∞-length(curbufstack[ptr]))],lf,
curbufstack[ptr],newline);
end;
comment The basic input procedure getnext and its cousins gettok,getncnext,getnctok;
internal simple procedure getnext # sends next input token to curcmd,curchar;
begin comment Although this procedure has to handle a lot of cases, note that
its inner loop is reasonably short and fast;
label switch; integer t,p;
switch: if state≠tokenlist then
begin comment reading an external file;
label innerswitch;
innerswitch:if(curchar←lop(curbuf))then
case state+(curcmd←chartype(curchar)) of begin
comment Now curcmd and curchar are set, but we may have to do special
actions. This case statement tells what to do for each
combination of state and curcmd, except when there's nothing to do;
[midline+spacer] state←skipblanks;
[midline+carret] begin state←newline;curbuf←null;curcmd←spacer end;
[midline+ignore][skipblanks+ignore][newline+ignore][skipblanks+spacer]
[newline+spacer] go to innerswitch # ignore the character;
[midline+escape][skipblanks+escape][newline+escape] begin
controlseq;
curcmd←field(idcmd,eqtb[hashentry]);
curchar←field(link,eqtb[hashentry]);
state←skipblanks end;
[skipblanks+lbrace][skipblanks+rbrace][skipblanks+mathbr][skipblanks+tabmrk]
[skipblanks+macprm][skipblanks+supmrk][skipblanks+submrk][skipblanks+letter]
[skipblanks+otherchar][newline+lbrace][newline+rbrace][newline+mathbr]
[newline+tabmrk][newline+macprm][newline+supmrk][newline+submrk]
[newline+letter][newline+otherchar] state←midline;
[skipblanks+carret] begin state←newline;curbuf←null;go to innerswitch end;
[newline+carret] begin curbuf←null; curcmd←endpar end;
else comment do nothing;
end
else begin comment curbuf is empty, must go to next line of file;
if inptr then
begin comment reading a character file;
inbuf←input(recovery,crffbreak) #
read file up to carriage return or form feed;
if eof then
begin comment done with reading a file;
comment The contents of inbuf can be ignored;
print(")");
release(recovery) # deactivate the channel;
popinput # restore previous status;
curcmd←endpar;return # end-of-file ends a paragraph;
end;
if brchar='14 then
begin comment page mark,inbuf can be ignored;
p←field(info,loc)+1 # advance page number;
print(" ",p) # print progress report for user;
loc ← p lsh infod # reset line number to zero;
end
else loc←loc+1 # advance line number;
comment No attempt is made here to remember the line
numbers on old style editing systems;
end
else begin comment reading online from terminal;
print(newline,"*") # prompt user for input;
inbuf←inchwl&'15 # append carriage-return deleted by system;
if escapechar=0 and (inbuf≠'15) then
begin escapechar←inbuf # first char input is the \;
chartype(escapechar)←escape;
end;
end;
go to innerswitch;
end
end
else begin comment traversing a tokenlist;
if loc then
begin t←info(loc) # get token to emit;
loc←link(loc) # advance to next element of token list;
curchar←field(char,t);
if(curcmd←field(cmd,t))=outpar then
begin comment insert a macro parameter;
pushinput;
loc←parstack[field(link,recovery)+curchar];
recovery←-loc;
comment The state remains at tokenlist;
go to switch;
end;
end
else begin comment end of tokenlist;
if recovery>0 then
begin if recovery < (1 lsh infod) then destroylist(recovery)
else begin t←field(info,recovery);
comment end of macro body, t points to its reference count;
deleterclink(t);
t←field(link,recovery) # now t is desired setting of parptr;
while parptr>t do
begin parptr←parptr-1;
destroylist(parstack[parptr]);
end;
end
else destroylist(recovery);
end;
popinput; go to switch;
end;
end;
end;
comment Three other routines are often used instead of getnext, namely:
gettok, which not only sets curcmd and curchar but also "curtok",
a packed version of the corresponding input token.
getncnext, meaning get non-call, which is like getnext but
if the current token is a user-defined control sequence
(i.e., a macro call) it is eliminated from the input;
getnctok, like getncnext but also sets curtok;
internal integer curtok # current token set by gettok and getnctok;
procedure gettok # set curcmd, curchar, and curtok;
begin hashentry←-1;
getnext;
if hashentry<0 then curtok←(curcmd lsh cmdd)+curchar else curtok←hashentry;
end;
internal simple procedure getncnext # gets next non-call input token;
while true do
begin getnext;
if curcmd≠call then return else macrocall;
end;
internal simple procedure getnctok # get next non-call token and sets curtok;
while true do
begin hashentry←-1;
getnext;
if hashentry<0 then
begin curtok←(curcmd lsh cmdd)+curchar; return;
end
else begin curtok←hashentry;
if curcmd≠call then return;
end;
end;
comment Defining user control sequences and output routines: macrodef,scantoks;
internal procedure macrodef(integer gdef);
begin comment "\def" or "\gdef" has just been scanned. This procedure scans the
macro definition and constructs the corresponding token list as described earlier;
integer npars # number of parameters (as ascii character);
integer p # pointer to previous node in linked list;
integer q # pointer to current node in linked list;
integer item # current entry to be appended to linked list;
define storeitem=⊂begin p←q; getavail(q);mem[p]←(item lsh infod)+q;end⊃ #
stores the previous item and makes it point to the current node;
integer unbal # count of {'s minus }'s in right-hand side of definition;
integer defplace # eqtb entry to define;
integer listhead # pointer to reference counter at the beginning of the list;
label finishup # the definition has been scanned;
hashentry←-1; getnext;
if (defplace←hashentry)<0 then
begin error("You can only define a control sequence"); return;
end;
if curcmd≥assignreal and curcmd≤font then
begin error("You can't redefine this control sequence"); return;
getavail(listhead); p←listhead;
getavail(q); mem[listhead]←q # initialize reference counter;
item ← defplace # first entry on list will point back to the eqtb;
npars←"0" # number of parameters seen so far;
while true do
begin gettok # set curcmd, curchar, curtok;
storeitem # store previous item and make room for a new one;
if curcmd=lbrace or curcmd=rbrace then done;
if curcmd≠macprm then item←curtok
else begin comment a new parameter to be matched when this macro called;
getnext; if curchar≠(npars←npars+1) then
error("Parameters must be numbered consecutively");
if npars>("0"+parsize) then overflow(parsize);
comment The previous statement guarantees that pstack, in
the macrocall procedure, will never overflow;
item←match lsh cmdd # store a match0 command;
end;
end;
item←(match lsh cmdd)+1 # store a match1 command;
if curcmd=rbrace then
begin error("Missing {");
go to finishup;
end;
comment Now curcmd=lbrace, scan the right-hand side;
unbal←1;
while true do
begin gettok;
if curcmd=rbrace then
begin unbal←unbal-1;
if unbal=0 then done;
end
else if curcmd=lbrace then unbal←unbal+1;
storeitem;
if curcmd≠macprm then item←curtok
else begin "#" sensed, look for two in a row;
getnext;
if curcmd≠macprm then
begin comment not two in a row, means parameter output;
if curchar>npars or curchar<"1" then
begin error("Illegal parameter number in "&
"definition of "&escapechar&idname(defplace));
curchar←"1" # treat as #1;
end;
item←((outpar lsh cmdd)-"1")+curchar;
end
else item←curtok;
end;
end;
finishup: comment Now the definition has been scanned, and item contains
the final token to be stored;
mem[q]←item lsh cmdd;
if gdef then
begin setufield(idlev,eqtb[defplace],level1);
q←curlev; curlev←level1 # temporarily switch to level 1;
end;
eqdefine(defplace,call,listhead) # set eqtb entry;
if gdef then curlev←q;
end;
internal integer procedure scantoks # build tokenlist for output and mark;
begin comment "\output" or "\mark" has just been scanned. This procedure
builds a token list somewhat like the token list of a macro definition,
but without parameters, and including the final } but not the initial {
of the token group, then it returns a pointer to the reference count
heading this list;
integer p # pointer to previous node in linked list;
integer q # pointer to current node in linked list;
integer item # current entry to be appended to linked list;
define storeitem=⊂begin p←q; getavail(q);mem[p]←(item lsh infod)+q;end⊃ #
stores the previous item and makes it point to the current node;
integer unbal # count of {'s minus }'s in right-hand side of definition;
integer listhead # pointer to reference counter at the beginning of the list;
getavail(listhead); q←listhead; item←0 # initialize reference counter;
unbal←0;
while true do
begin gettok;
storeitem; item←curtok;
if curcmd=rbrace then
begin unbal←unbal-1;
if unbal≤0 then done;
end
else if curcmd=lbrace then unbal←unbal+1;
end;
if unbal<0 then error("Missing {");
mem[q]←item lsh cmdd # store final rbrace;
return(listhead);
end;
comment Calling user macros: macrocall;
internal procedure macrocall # invoke a user-defined control sequence;
begin comment "\mac" has just been scanned, where \mac is a control sequence
previously defined with \def. The body of its definition is a tokenlist
beginning with the reference counter in location curchar,
and it has the form described above in the discussion of token lists.
This procedure first scans to find the parameters, placing them in the
auxiliary stack pstack (since the parstack may be losing entries during
this matching process). Then the parameters are placed on parstack and
the right-hand side of the macro body is fed to the scanner;
integer refcount # points to the reference count;
integer defplace # points to the index of \mac in eqtb;
integer npars # number of parameters scanned;
integer p # pointer to previous node in linked list;
integer q # pointer to current node in linked list;
integer item # current entry to be appended to linked list;
define storeitem=⊂begin p←q; getavail(q);mem[p]←(item lsh infod)+q;end⊃ #
stores the previous item and makes it point to the current node;
integer unbal # count of {'s minus }'s in parameter being matched;
integer ngrps # number of tokens or {} groups in parameter being matched;
integer r # pointer to current node in macro body;
integer t # current token of interest;
defplace←hashentry;
r←link(link(refcount←curchar)) # point to first item after \mac token;
comment defplace should equal info(link(refcount));
npars←0;
if tracing land 1 then
begin comment tracing macro calls;
dumplist(link(refcount),0);
print(newline,toklist[0]);
end;
while (t←info(r))#((match lsh cmdd)+1) do
begin r←link(r);
if t ≠ match lsh cmdd then
begin comment input must match token t;
gettok;
if curtok≠t and firsterror then
begin firsterror←false;
error("Use of "&escapechar&idname(defplace)&
" doesn't match its definition");
end;
end
else begin "findparameter";
if ufield(cmdd,t←info(r)) = match lsh cmdd then
begin comment undelimited parameter;
t←-1;
end
else begin comment parameter delimited by t;
r←link(r);
end;
q←temphead; item←0 # mem[temphead] will point to tokenlist created;
ngrps←0;
gettok # set curtok to next input token;
while curtok≠t do
begin storeitem; item←curtok;
if curcmd=lbrace then
begin comment scan a {} group;
unbal←1;
while true do
begin gettok;
storeitem; item←curtok;
if curcmd=rbrace then
begin unbal←unbal-1;
if unbal=0 then done;
end
else if curcmd=lbrace then unbal←unbal+1;
end;
end;
ngrps←ngrps+1;
if t<0 then done else gettok;
if curcmd=parend then error("Use of "&escapechar&
idname(defplace)&" has paragraph-end in #"&
(npars+"1"));
end;
if ngrps=1 and curcmd=rbrace then
begin comment strip off enclosing braces;
mem[p]←mem[p] land (-1 lsh infod) # zero the link field;
freeavail(q);
pstack[npars]←link(mem[temphead]);
freeavail(mem[temphead]);
end
else begin comment attach final symbol to list;
mem[q]←item lsh infod;
pstack[npars]←mem[temphead];
end;
if tracing land 1 then
begin dumplist(pstack[npars],0);
print(newline,"#"&(npars+"1"),"←",tokstring[0]);
end;
npars←npars+1;
end "findparameter";
end;
comment Now matching and parameter building are complete, and link(r) points
to the right-hand side of the macro definition;
if parptr+npars>parsize then overflow(parsize);
for q←0 thru npars-1 do parstack[parptr+q]←pstack[q];
pushinput # prepare to insert macrobody in input;
loc←link(r);
state←tokenlist;
recovery←(refcount lsh infod)+parptr;
parptr←parptr+npars;
mem[refcount]←mem[refcount]+refct1 # increase reference count;
end;
comment Accessing user's files: requirefile, definefont;
comment This page contains the most operating-system dependent aspects
of the TEX input system;
string array fname[0:2] # file name, extension, and directory;
procedure scanfilename # sets up fname[0:2];
begin integer j;
fname[0]←fname[1]←fname[2]←null;
j←0;
while true do
begin getnctok;
if curcmd = spacer then done;
if curchar = "." then j←1
else if curchar = "[" then j←2;
fname[j]←fname[j]&curchar;
end;
end;
internal procedure requirefile;
begin comment "\require" has just been scanned. This procedure scans
the user's file name, employing the SAIL naming conventions, then reads
in the first line and feeds it to the input system;
string array fname[0:2] # name,extension,directory;
integer chan;
label abort # if something goes wrong trying to read the file;
define checkeof=⊂if eof then begin print(")");go to abort end⊃;
scanfilename;
if fname[1]=0 then fname[1]←".TEX";
pushinput # save present file status;
filename←fname[0]&fname[1]&fname[2];
open(chan←getchan,"DSK",0,if inptr=1 then 19 else 2, 0,
100,brchar,eof);
state←newline; recovery←chan;
comment On the SAIL system, 19 buffers is the most efficient for disk files;
lookup(chan,filename,eof);
if eof then
begin error("Lookup failed on file "&filename);go to abort;
end;
print("(",filename);
inbuf←input(chan,crffbreak) # get first line of file;
checkeof; print(" 1");
if equ(inbuf[1 to 9],"COMMENT ⊗") then
begin comment Skip TVedit directory page;
while brchar≠'14 and not eof do inbuf←input(chan,ffbreak);
checkeof;
inbuf←input(chan,crffbreak) # get first line of second page;
checkeof; print(" 2");
loc ← (2 lsh infod) + 1 # page 2 line 1;
end
else loc ← (1 lsh infod) + 1 # page 1 line 1;
curbuf←inbuf;
comment Now define the output file name if it hasn't yet been defined;
if outfilename=0 then outfilename←fname[0]&outfileext&fname[2];
return;
abort: release(chan);
popinput;
end;
internal procedure definefont(integer f) # Do this after seeing "=" of font def;
begin integer n,p,chan; string s;
scanfilename;
fontname[f]←fname[0]&fname[2];
open(chan←getchan,"DSK",8,1,0,0,eof);
lookup(chan,s←fname[0]&".TFD&fname[2],eof);
if eof then
begin error("Lookup failed on file "&s); quit;
end;
readtfd(chan,f) # input the font info needed by semantics and output routines;
release(chan);
end;
comment Basic scanning routines: backinput,scandigit,scanlb,scanstring,scannumber;
internal procedure backinput # puts curtok back into the input;
begin comment When using this procedure, be sure to have called gettok or getnctok
instead of getnext or getncnext;
integer p;
getavail(p);
mem[p]←curtok lsh infod # create a tokenlist of length 1;
inslist(p);
end;
internal integer procedure scandigit # scans "0"..."9";
begin comment If the next input token is a digit, this procedure returns that
digit (in ascii code). Otherwise this procedure gives an error message and
returns "0";
getnctok;
if curtok<(otherchar lsh cmdd)+"0" or curtok>(otherchar lsh cmdd)+"9" then
begin backerror("Digit (0 to 9) required here");
curchar←"0";
end;
return(curchar);
end;
internal procedure scanlb # scans {;
begin comment If the next input token is not a left brace delimiter,
this procedure gives an error message. Routines using this procedure
assume that a left brace is present;
getnctok;
if curcmd≠lbrace then backerror("Missing {");
end;
internal boolean procedure scanstring(string s) # scans a given letter string;
begin comment Here s is a string of letters. This procedure returns
true and removes s if the next characters of the input agree with s,
otherwise it returns false and effectively leaves the input unchanged;
string ss; integer c,q,p,head;
ss←s;
while c←lop(ss) do
begin getnctok;
if curtok≠(letter lsh cmdd)+c then
begin comment match failed, we construct a token string to insert;
getavail(q); head←q;
while length(s)≠length(ss)+1 do
begin p←q; getavail(q);
mem[p]←((lop(s)+(letter lsh cmdd))lsh infod)+q;
end;
mem[q]←curtok lsh infod;
inslist(head);
return(false);
end;
end;
return(true);
end;
internal integer nbrlength # length of scanned number;
internal integer nbrsign # sign, if any, preceding scanned number;
internal integer procedure scannumber # scans a decimal or octal number;
begin comment This procedure removes from the input a string of the form
space* [+ space* | - space*] ['] digit* [space]
where ' denotes octal radix, and returns the corresponding decimal or octal
value of the digit string. Global variable nbrlength is set to the
number of digits, and nbrsign is set to "+" or "-" if a sign appeared;
integer n,radix;
n←nbrsign←nbrlength←0;
do getnctok until curcmd≠spacer;
if curtok=(otherchar lsh cmdd)+"+" or curtok=(otherchar lsh cmdd)+"-" then
begin nbrsign←curchar;
do getnctok until curcmd≠spacer;
end;
if curtok≠(otherchar lsh cmdd)+"'" then radix←10
else begin radix←8; getnctok;
end;
while curtok≥(otherchar lsh cmdd)+"0" and curtok≥(otherchar lsh cmdd)+"9" do
begin n←radix*x+curchar-"0";
nbrlength←nbrlength+1;
getnctok;
end;
if curcmd≠spacer then backinput;
return(n);
end;
comment Further scanning routines: scanlength,scanglue,scanspec;
internal real procedure scanlength # scans a length specification;
begin comment This procedure scans the input for
<number> [. <number>] <units> [space]
and returns the corresponding value in points;
comment If the number after the decimal point is octal or signed,
no error is detected but the result may be unusual;
integer n; real x,sign;
x←scannumber;
if nbrprefix="-" then sign←-1.0 else sign←+1.0;
getnctok;
if curtok=(otherchar lsh cmdd)+"." then
begin n←scannumber;
x←x+n/10.0↑nbrlength;
end
else backinput;
if scanstring("pt") then comment already in points;
else if scanstring("in") then x←x/0.013837
else if scanstring("pc") then x←x*12.0
else if scanstring("cm") then x←x/(0.013837*2.54)
else if scanstring("mm") then x←x/(0.013837*25.4)
else if scanstring("dd") then x←x*(0.0148/0.013837)
else error("Illegal units of measure");
getnctok; if curcmd≠spacer then backinput;
return(x*sign);
end;
internal integer procedure scanglue # scans a glue specification;
begin comment This procedure scans the input for
<length> [plus <length>] [minus <length>]
and returns a pointer to a new glue node having these parameters;
integer p;
p←getnode(gluespecsize);
gluespace(p)←scanlength;
if scanstring("plus") then gluestretch(p)←scanlength else gluestretch(p)←0.0;
if scanstring("minus") then glueshrink(p)←scanlength else glueshrink(p)←0.0;
return(p);
end;
internal procedure scanspec # scans a justification specification and a {;
begin comment
If the input is then this procedure puts on savestack
to [space] size space* { hsize or vsize (acc. to current mode)
to <length> space* { value(<length>)
expand <length> space* { - value(<length>)
space* { - 0.000001;
real v;
if scanstring("to") then
begin getnctok; if curcmd≠spacer then backinput;
if scanstring("size") then
if mode=-vmode then v←memreal(eqlink(vsize))
else v←memreal(eqlink(hsize));
else v←scanlength;
end
else if scanstring("expand") then
begin v← -scanlength; if v≥0 then v←-.000001;
end
else v←-.000001;
while curcmd=spacer do getnctok;
if curcmd≠lbrace then backerror("Missing {");
savestack[saveptr]←v;
saveptr←saveptr+1 # It's not necessary to check for stack overflow here;
end;
comment Still more scanning routines: passblock,insnum,scancond;
internal procedure passblock # scans past an entire {} block;
begin integer unbal;
unbal←0;
while true do
begin getnext;
if curcmd=rbrace then
begin unbal←unbal-1;
if unbal≤0 then done;
end
else if curcmd=lbrace then unbal←unbal+1;
end;
if unbal<0 then error("Missing {");
end;
preload_with 1000,500,100,50,10,5,1;integer array romval[1:7];
define lt(x)=⊂((letter lsh cmdd)+"x")lsh infod⊃;
preload_with lt(m),lt(d),lt(c),lt(l),lt(x),lt(v),lt(i);integer array romtok[1:7];
internal procedure insnum(integer n) # puts string version of n into input;
begin comment if n is negative, the Roman numeral value of n is placed
into the input stream, otherwise the decimal value of n is placed there;
begin integer p,q;
if n≥0 then
begin comment decimal number, build tokenlist from right to left;
p←0;
do begin getavail(q);
mem[q]←((n mod 10)+("0"+(otherchar lsh cmdd))lsh infod)+p;
p←q; n←n div 10;
end until n=0;
end
else begin comment roman numeral, build tokenlist from left to right;
integer item,j,k;
p←temphead; item←0;
j←1; n←-n;
while true do
begin while n≥romval[j] do
begin getavail(q); mem[p]←item+q;
p←q; item←romtok[j];
n←n-romval[j];
end;
if n=0 then done;
k←j+1+(j land 1) # m,d → c c,l → x x,v → i;
if n+romval[k]≥romval[j] then
begin getavail(q); mem[p]←item+q;
p←q; item←romtok[k];
n←n+romval[k];
end
else j←j+1;
end;
mem[p]←item;
p←mem[temphead] # p points to the tokenlist;
end;
inslist(p);
end;
internal procedure scancond(boolean b) # scanning for if-then-else constructs;
if b then
begin scanlb # must find {;
newsavelevel(trueend);
end
else begin passblock # skip the true part;
getnctok;if curcmd≠elsecode then backerror(escapechar&"else required here");
end;